import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import pandas as pd
wine_data = pd.read_csv('winequality-white.csv')
wine_data.head()
wine_data.columns
wine_data.tail()
len(wine_data)
wine_data.describe()
import seaborn as sns
import matplotlib.pyplot as plt
wine_data = pd.read_csv('winequality-white.csv')
wine_data.columns = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar',
'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
'pH', 'sulphates', 'alcohol', 'quality']
wine_data.tail(10)
plt.hist(wine_data.alcohol)
# histogram bars KDE:gaussian kernel density estimate.
sns.distplot(wine_data.alcohol, kde=True)
around 40% have alcohol content = 9.3
f, ax = plt.subplots(figsize=(15, 5))
sns.distplot(wine_data.alcohol, kde=True)
f, ax = plt.subplots(figsize=(15, 5))
sns.distplot(wine_data.alcohol, kde=False, rug=True, bins=200)
rug: Whether to draw a rugplot on the support axis.
bins control granularity of the bars , bins = more size -> you can analyse the data more deep
f, ax = plt.subplots(figsize=(15, 5))
# most of the rug distribution is clustered around centre
sns.rugplot(wine_data.alcohol, height=0.75)
f, ax = plt.subplots(figsize=(15, 5))
sns.distplot(wine_data.alcohol, kde=True, rug=True, hist=False)
f, ax = plt.subplots(figsize=(15, 5))
sns.set(color_codes=True)
sns.kdeplot(wine_data.alcohol, shade=True, color='r')
f, ax = plt.subplots(figsize=(15, 5))
sns.kdeplot(wine_data.alcohol)
sns.kdeplot(wine_data.alcohol, bw=0.04, label='bw =0.04 ')
sns.kdeplot(wine_data.alcohol, bw=0.2, label='bw =0.2 ')
sns.kdeplot(wine_data.alcohol, bw=2, label='bw =2 ')
sns.kdeplot(wine_data.alcohol, bw=5, label='bw =5 ')
sns.jointplot(x=wine_data.free_sulfur_dioxide, y=wine_data.total_sulfur_dioxide, xlim={
0, 150}, ylim={0, 400}, alpha=0.2) # ALPHA STILL WORKS HERE
sns.jointplot(x=wine_data.free_sulfur_dioxide, y=wine_data.total_sulfur_dioxide, kind='hex', xlim={0, 100}, ylim={0, 200})
sns.jointplot(x=wine_data.free_sulfur_dioxide,
y=wine_data.total_sulfur_dioxide, kind='kde', xlim={0, 80}, ylim={0, 300})
f, ax = plt.subplots(figsize=(8, 5))
sns.kdeplot(wine_data.free_sulfur_dioxide, wine_data.total_sulfur_dioxide)
sns.rugplot(wine_data.free_sulfur_dioxide, color='g', height=0.05)
sns.rugplot(wine_data.total_sulfur_dioxide,
color='b', height=0.05, vertical=True)
plt.xlim(-20, 100)
plt.ylim(-50, 400)
# sns.pairplot(wine_data, height=3) # rn this it takes a lot of time
sns.pairplot(wine_data, height=3, vars=[
'fixed_acidity', 'chlorides', 'sulphates', 'alcohol', 'quality'], diag_kind='kde')
sns.pairplot(wine_data, height=3, vars=[
'chlorides', 'sulphates', 'quality'], kind='reg')
g = sns.PairGrid(wine_data, height=3, vars=['chlorides', 'sulphates', 'quality'])
g.map(plt.scatter)
g = sns.PairGrid(wine_data, vars=['chlorides','sulphates', 'alcohol'], hue='quality')
g.map_offdiag(plt.scatter)
g.map_diag(sns.kdeplot)
plt.legend(loc=2)
g = sns.PairGrid(wine_data, vars=['chlorides', 'sulphates', 'alcohol'])
g.map_diag(sns.kdeplot)
g.map_upper(sns.scatterplot)
g.map_lower(sns.regplot)
g = sns.PairGrid(wine_data, x_vars=['chlorides', 'sulphates', 'fixed_acidity'], y_vars=['alcohol'])
g.map(plt.scatter)
# g.map_diag(sns.kdeplot)
# g.map_upper(sns.scatterplot)
# g.map_lower(sns.regplot)
g = sns.PairGrid(wine_data, x_vars=['chlorides', 'sulphates', 'fixed_acidity'], y_vars=['alcohol'])
g.map(sns.scatterplot) # sns looks more beautiful
# sns.pairplot(wine_data, height=3, vars=['fixed_acidity','chlorides','sulphates', 'quality'], hue = 'pH')
corrmat = wine_data.corr()
f, ax = plt.subplots(figsize=(10, 10))
# sns.heatmap(corrmat, vmin = -0.9, vmax=0.95, square = True, annot= True, fmt='.2f', cmap='summer' )
sns.heatmap(corrmat, annot=True, fmt='0.2f', square=True)
sns.lmplot(x='residual_sugar', y='density', data=wine_data, height=7, aspect=2)
sns.lmplot(x='quality', y='alcohol', data=wine_data)
sns.lmplot(x='quality', y='alcohol', data=wine_data, x_jitter=.2)
import numpy as np
sns.lmplot(x='quality', y='alcohol', data=wine_data, x_estimator=np.mean)
# sns.lmplot(x='pH',y='fixed_acidity', data= wine_data, row='quality', hue='alcohol') # Fixed_acidity vs pH in each quality category with hue= alcohol : needs tuning
# sns.lmplot(x='pH',y='fixed_acidity', data= wine_data, col='quality', hue='alcohol') # Fixed_acidity vs pH in each quality category with hue= alcohol
sns.regplot(wine_data.alcohol, wine_data.density, color='0')
f, ax = plt.subplots(figsize=(12, 5))
sns.regplot(x=wine_data.residual_sugar, y=wine_data.density,
ax=ax) # controlling size and shape
sns.jointplot(x=wine_data.free_sulfur_dioxide, y=wine_data.total_sulfur_dioxide, xlim={0, 150}, ylim={0, 400}, kind='reg')
sns.pairplot(wine_data, x_vars=['fixed_acidity','citric_acid', 'chlorides'], y_vars=['alcohol'], kind='reg', height=8, aspect=1)
f, ax = plt.subplots(figsize=(15, 5))
sns.stripplot(x='quality', y='alcohol', data=wine_data)
sns.swarmplot(x='quality', y='alcohol', data=wine_data)
sns.dogplot() # haha
sns.boxplot(x='quality', y='alcohol', data=wine_data)
sns.violinplot(x='quality', y='alcohol', data=wine_data)
sns.violinplot(x='quality', y='alcohol', data=wine_data, scale='count')
sns.violinplot(x='quality', y='alcohol', data=wine_data,scale='count', inner='stick')
sns.violinplot(x='quality', y='alcohol', data=wine_data)
sns.swarmplot(x='quality', y='alcohol', data=wine_data)
sns.barplot(x='quality', y='pH', data=wine_data)
sns.countplot(x='alcohol', data=wine_data)
sns.countplot(x='alcohol', data=wine_data, color='m', palette='Greens_d')
plt.xticks(rotation=90)
sns.pointplot(x='quality', y='pH', data=wine_data)
sns.boxplot(data=wine_data, orient='h')
titanic = pd.read_csv('titanic.csv')
titanic.head()
g = sns.FacetGrid(titanic, col='Pclass') # try plotting with out mapping and see !
g = sns.FacetGrid(titanic, col='Survived')
g.map(plt.hist, 'Pclass', color='k')
g = sns.FacetGrid(titanic, col='Survived', height=8)
g.map(plt.scatter, 'Age', 'Fare')
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(plt.scatter, 'Age', 'Fare', alpha=0.99) # alpha is the opacity
g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(plt.bar, 'Age', 'Fare', alpha=0.99) # alpha is the opacity
g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8, hue='Sex')
g.map(plt.bar, 'Age', 'Fare', alpha=0.99) # alpha is the opacity
g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8, hue='Sex')
g.map(sns.barplot, 'Age', 'Fare')
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8, hue='Sex')
g.map(plt.scatter, 'Age', 'Fare', alpha=0.99) # alpha is the opacity
g.add_legend()
# How about changing some colors
h = {"male": 'b', "female": 'r'}
g = sns.FacetGrid(titanic, col='Survived', row='Pclass',height=8, hue='Sex', palette=h)
g.map(plt.scatter, 'Age', 'Fare', alpha=0.99) # alpha is the opacity
g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', height=8, col_wrap=4)
g.map(sns.barplot, 'Age', 'Fare')
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8, hue='Sex')
g.map(sns.barplot, 'Age', 'Fare', color='#334488',
edgecolor='red', lw=.5) # color is a hex parameter
plt.xticks(rotation=90)
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=8)
g.map(sns.barplot, 'Age', 'Fare', color='#334488',
edgecolor='red', lw=.5) # color is a hex parameter
plt.xticks(rotation=90)
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g.set_axis_labels('Age of Passengers', 'Fare of each Passenger')
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(sns.scatterplot, 'Age', 'Fare') # color is a hex parameter
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g.set_axis_labels('Age of Passengers', 'Fare of each Passenger')
g.set(yticks=[0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500])
g = sns.FacetGrid(titanic, col='Survived', height=5,
hue='Sex', xlim=(0, 16), ylim=(100, 500))
g.map(sns.scatterplot, 'Age', 'Fare') # color is a hex parameter
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g.set_axis_labels('Age of Passengers', 'Fare of each Passenger')
g = sns.FacetGrid(titanic, col='Survived', height=5, hue='Sex')
g.map(sns.scatterplot, 'Age', 'Fare') # color is a hex parameter
# giving some white space and horizontal spacing
g.fig.subplots_adjust(wspace=0.3, hspace=0.5)
g.set_axis_labels('Age of Passengers', 'Fare of each Passenger')
g.set(xlim=(0, 16), ylim=(100, 500))
bike = pd.read_csv('bike_sharing_daily.csv')
bike.head()
bike.columns
bike.columns = ['instant', 'date', 'season', 'year', 'month', 'holiday', 'weekday',
'workingday', 'weathersituation', 'temp', 'atemp', 'humidity', 'windspeed',
'casual_users', 'registered_users', 'cnt']
# bike.cnt.head(200) bike count per day may go above 8000 per day and some days as low as 100 bikes per day
f, ax = plt.subplots(figsize=(15, 5))
sns.set(style="whitegrid")
# sns.set_xticklabels=([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000])
sns.distplot(bike.cnt, bins=120, kde=False, rug=True, color='k')
f, ax = plt.subplots(figsize=(15, 5))
sns.set(style="whitegrid")
# sns.set_xticklabels=([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000])
sns.distplot(bike.cnt, bins=120, kde=True, rug=True, color='k')
# number of bikes hired as a function of day passed
sns.jointplot(bike.instant, bike.cnt, height=8, color='g')
sns.jointplot(bike.month, bike.cnt, height=8, color='g')
sns.jointplot(bike.season, bike.cnt) # counts are higher in season 3 and 4
sns.pairplot(bike, height=6, aspect=1.2, x_vars=[
'temp', 'humidity', 'windspeed'], y_vars='cnt', hue='season')
# sns.set_style('darkgrid') # applies to all lines ; affects all plots that follow
# f,ax = plt.subplots(figsize=(15,5))
# sns.scatterplot(bike.cnt)
with sns.axes_style('darkgrid'): # applies to only this lines
f, ax = plt.subplots(figsize=(15, 5))
sns.distplot(bike.cnt)
sns.jointplot(bike.instant, bike.cnt, height=8, color='g')
sns.despine(offset=15, trim=True)
sns.set()
sns.jointplot(bike.instant, bike.cnt, height=8, color='g')
current_palette = sns.color_palette()
sns.palplot(current_palette)
sns.palplot(sns.color_palette('hls', 8)) # hls-> Hue Lightness and Saturation
sns.palplot(sns.hls_palette(n_colors=15, h=.99, l=0.5, s=.99))
sns.choose_colorbrewer_palette('sequential')
sns.choose_colorbrewer_palette('diverging')
# sequential color plots ;
sns.palplot(sns.color_palette('Blues'))
# sequential color plots ;
sns.palplot(sns.color_palette('Blues_d'))
sns.choose_colorbrewer_palette('sequential')
sns.palplot(sns.color_palette('cubehelix', 12))
sns.palplot(sns.cubehelix_palette(12)) # good for printing
h = sns.choose_colorbrewer_palette('diverging')
h = sns.choose_colorbrewer_palette('sequential')
g = sns.PairGrid(wine_data, x_vars=['chlorides', 'sulphates', 'fixed_acidity'], y_vars=['alcohol'], hue='pH', palette=h, height=8)
g.map(sns.scatterplot) # sns looks more beautiful
# g.add_legend()
sns.axes_style() # current style
sns.set_style('ticks', {'xtick.major.size': 8, 'xtick.color': '.15',
'ytick.color': '.85', 'ytick.major.size': 10, 'axes.facecolor': 'm'})
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
sns.set() # reset everything
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
sns.set_context('talk')
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
sns.set_context('paper')
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
sns.set_context('notebook')
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')
sns.set_context('poster')
sns.jointplot(bike.instant, bike.cnt, height=8, color='k')